#===============================================================================
# 4_text_study_figure_5.R
# Purpose: To replicate Figure 5 of the paper (3 figure panels separately)
#          "Results from the Text-Annotation Exercise"
#===============================================================================

# PACKAGES
#===============================================================================
library(dplyr)
library(ggplot2)
library(broom)
library(lme4)

# DATA
#===============================================================================
# - load the pairwise dataset. N = 4,417
db <- read.csv("./data/text-experiment-merged-dataset-PAIRWISE-IRR.csv")

# - define references categories for the categorical variables
db <- db %>% 
  mutate(training = factor(training, levels = c("Basic", "Intermediate", 
                                                "Advanced")),
         task = relevel(factor(task), ref = "DIRECTED_AT")) %>%
  rename(sameideology = samgeideology) %>%
  na.omit() # dropping 2 obs. with NAs. N = 4,415

# - load dataset with the individual message annotations
main <- read.csv("./data/text-experiment-merged-dataset.csv")

# MAIN
#===============================================================================

# Panel A: OVERALL IRR by training session and by task
#-------------------------------------------------------------------------------
plotdb_a <- db %>%
  group_by(training, task) %>%
  summarise(irr_avg = mean(irr_cohen)) %>%
  as.data.frame() %>%
  mutate(task = recode(task,
                       `DIRECTED_AT` = "Directed at",
                       `NEG_TONE` = "Negative\ntone",
                       `CONS_VIEW` = "Conservative\nview",
                       `PROGR_VIEW` = "Progressive\nview",
                       `GENDER` = "Gender\nissue",
                       `ANGRY_FEEL` = "Feel\nangry",
                       `ENTHU_FEEL` = "Feel\nenthusiastic"),
         task = factor(task, levels = c(
           "Directed at",
           "Negative\ntone",
           "Conservative\nview",
           "Progressive\nview",
           "Gender\nissue",
           "Feel\nangry",
           "Feel\nenthusiastic")),
         training = paste0(training, "\ntraining"),
         training = factor(training, levels = rev(c(
           "Basic\ntraining", "Intermediate\ntraining", "Advanced\ntraining"))))

ggplot(plotdb_a,
       aes(x = training, y = irr_avg)) +
  geom_bar(stat = "identity", position = "dodge", fill = "gray60", 
           color = "gray30") +
  geom_text(aes(x = training, y = irr_avg + 0.15, 
                label = round(irr_avg, 2)), size = 3) +
  geom_hline(yintercept = 0) +
  facet_wrap(~ task, nrow = 1) +
  scale_x_discrete("") +
  scale_y_continuous("\nAverage IRR (Cohen's Kappa) for unique pairs of coders", limits = c(0, 1)) +
  coord_flip() +
  theme(legend.position = "bottom",
        panel.background = element_blank(),
        panel.spacing = unit(1.5, "lines"),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 10),
        axis.title.x = element_text(size = 10),
        strip.text.x = element_text(size = 11, hjust = 0.5, vjust = 1),
        strip.background = element_blank())

ggsave("./results/5A_kappa_training_rounds.tiff",
       width = 30, height = 7, units = "cm", device = "tiff", dpi = 300)

# Panel B: DIFF between overall IRR in Advanced and Basic training session
#-------------------------------------------------------------------------------
plotdb_b <- db %>%
  group_by(task) %>%
  summarise(irrdiff = mean(irr_cohen[training == "Advanced"]) -
              mean(irr_cohen[training == "Basic"])) %>%
  as.data.frame() %>%
  mutate(task = recode(task,
                       `DIRECTED_AT` = "Directed at",
                       `NEG_TONE` = "Negative\ntone",
                       `CONS_VIEW` = "Conservative\nview",
                       `PROGR_VIEW` = "Progressive\nview",
                       `GENDER` = "Gender\nissue",
                       `ANGRY_FEEL` = "Feel\nangry",
                       `ENTHU_FEEL` = "Feel\nenthusiastic"),
         task = factor(task, levels = c(
           "Directed at",
           "Negative\ntone",
           "Conservative\nview",
           "Progressive\nview",
           "Gender\nissue",
           "Feel\nangry",
           "Feel\nenthusiastic")))

ggplot(plotdb_b,
       aes(x = "Improvement", y = irrdiff)) +
  geom_bar(stat = "identity", fill = "gray60", color = "gray30", width = 0.5) +
  geom_text(aes(x = "Improvement", y = ifelse(sign(irrdiff) == 1,
                                              irrdiff + 0.08,
                                              irrdiff - 0.09),
                label = round(irrdiff, 2)), size = 3) +
  geom_hline(yintercept = 0) +
  facet_wrap(~ task, nrow = 1) +
  coord_flip() +
  scale_x_discrete("", breaks = NULL) +
  scale_y_continuous("\nOverall IRR (Cohen's Kappa) improvement: Advanced v. Basic training round", 
                     limits = c(-0.3, 0.3)) +
  theme(legend.position = "bottom",
        panel.background = element_blank(),
        panel.spacing = unit(1.5, "lines"),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 10),
        axis.title.x = element_text(size = 10),
        strip.text.x = element_text(size = 11, hjust = 0.5, vjust = 1),
        strip.background = element_blank())

ggsave("./results/5B_kappa_diff_basic_advanced.tiff",
       width = 30, height = 7, units = "cm", device = "tiff", dpi = 300)

# Panel C: IDEOLOGICAL DIFF: model predicting diff in ideology by training
#          session and task
#-------------------------------------------------------------------------------
plotdb_c <- db %>%
  group_by(training, task) %>%
  summarise(irr_diff = mean(irr_cohen[sameideology == 1]) - 
              mean(irr_cohen[sameideology == 0])) %>%
  as.data.frame() %>%
  mutate(task = recode(task,
                       `DIRECTED_AT` = "Directed at",
                       `NEG_TONE` = "Negative\ntone",
                       `CONS_VIEW` = "Conservative\nview",
                       `PROGR_VIEW` = "Progressive\nview",
                       `GENDER` = "Gender\nissue",
                       `ANGRY_FEEL` = "Feel\nangry",
                       `ENTHU_FEEL` = "Feel\nenthusiastic"),
         task = factor(task, levels = c(
           "Directed at",
           "Negative\ntone",
           "Conservative\nview",
           "Progressive\nview",
           "Gender\nissue",
           "Feel\nangry",
           "Feel\nenthusiastic")),
         training = paste0(training, "\ntraining"),
         training = factor(training, levels = rev(c(
           "Basic\ntraining", "Intermediate\ntraining", "Advanced\ntraining"))))

ggplot(plotdb_c,
       aes(x = training, y = irr_diff)) +
  geom_bar(stat = "identity", position = "dodge", fill = "gray60", 
           color = "gray30") +
  geom_text(aes(x = training, y = ifelse(sign(irr_diff) == 1,
                                         irr_diff + 0.028,
                                         irr_diff - 0.028),
                label = ifelse(sign(irr_diff) == 1,
                               paste0("+", round(irr_diff, 2)),
                               round(irr_diff, 2))), size = 3) +
  geom_hline(yintercept = 0) +
  facet_wrap(~ task, nrow = 1) +
  scale_x_discrete("") +
  scale_y_continuous("\nDifference in IRR (Cohen's Kappa) between pairs of the same v. different ideology", 
                     limits = c(-0.04, 0.12), expand = c(0, 0.02)) +
  coord_flip() +
  theme(legend.position = "bottom",
        panel.background = element_blank(),
        panel.spacing = unit(1.5, "lines"),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 10),
        axis.title.x = element_text(size = 10),
        strip.text.x = element_text(size = 11, hjust = 0.5, vjust = 1),
        strip.background = element_blank())

ggsave("./results/5C_kappa_diff_between_across_ideo.tiff",
       width = 30, height = 7, units = "cm", device = "tiff", dpi = 300)
